import string, re

# Mapping of letter frequencies
letter_freq = {'A':.082,'B':.015,'C':0.028,'D':.043,'E':.127,'F':.022,'G':.02,'H':.061,\
		'I':.07,'J':.002,'K':.008,'L':.04,'M':.024,'N':.067,'O':.075,'P':.019,'Q':.001,\
		'R':.06,'S':.063,'T':.091,'U':.028,'V':.01,'W':.023,'X':.001,'Y':.02,'Z':.001}
alphabet_length = 26 # Number of letters in the alphabet
alphabet_offset = 65 # Offset of 'A' for ASCII characters
english_IC = 0.065 # IC of english
key_lengths = 10 # Number of key lengths to test
# Encrypted message
input = 'XKJUROWMLLPXWZNPIMBVBQJCNOWXPCCHHVVFVSLLFVXHAZITYXOHULX \
	QOJAXELXZXMYJAQFSTSRULHHUCDSKBXKNJQIDALLPQSLLUHIAQFPBPC \
	IDSVCIHWHWEWTHBTXRLJNRSNCIHUVFFUXVOUKJLJSWMAQFVJWJSDYLJ \
	OGJXDBOXAJULTUCPZMPLIWMLUBZXVOODYBAFDSKXGQFADSHXNXEHSAR \
	UOJAQFPFKNDHSAAFVULLUWTAQFRUPWJRSZXGPFUTJQIYNRXNYNTWMHC'

# letter_freq = {'A':0.7,'B':0.2,'C':0.1}
# alphabet_length = 3
# alphabet_offset = 65
# english_IC = 0.54
# key_lengths = 4
# input = 'ABCBABBBAC'

def calculate_IC(str):
	'''Calculates and returns the IC for a given string'''
	IC = 0
	for letter in letter_freq.keys():
		count = str.count(letter)	# count the frequency of every letter
		IC += float(count * (count-1))/float(len(str) * (len(str)-1))
	return IC

def get_substring(str, keylen):
	'''Returns a list of substrings of an offset of mod keylen'''
	ret = []
 	for incr in range(keylen): # Iterate through the positions within each key
 		substring = ''
 		for char in range(len(str)): # Get every X characters from the input
 			if char % keylen == incr:
 				substring += str[char]
 		ret.append(substring)
 	return ret

def char_shift(char, x):
	'''Shifts a given character x amount and returns it'''
	i = ord(char) - alphabet_offset
	i += x
	while i < 0:
		i += alphabet_length
	return chr(i + alphabet_offset)

if __name__ == '__main__':
	# input = raw_input("Enter string: ")

	# Remove all whitespace from the input string and convert the string to uppercase
	input_stripped = re.sub('\s', '', input)
	input_stripped = input_stripped.upper()

	# Print out the sample size (length of input)
	sample_size = len(input_stripped)
	print "Sample Size: {0}\n".format(sample_size)

	# Calculate and print out the IC with keys of length 1-9
	print "Index of Coincidence (0.065 = IC of english):"
	freq_avg = dict()
	freq_diff = dict()

	# Create a mapping of IC to key lengths
	for length in range(1,key_lengths):
		avg_IC = 0
		substrings = get_substring(input_stripped, length) # Get a list of substrings
		print "M = {0}:".format(length),
		for str in substrings: # Calculate the average IC of all the substrings
			IC = calculate_IC(str)
			avg_IC += IC
			print "{0:.2}".format(IC),
		print " : {0:.2}".format(avg_IC/length)
		freq_avg[avg_IC/length] = length # Store the key length and resulting IC

	# Create a mapping of |IC - 0.065| to key lengths
	for entry in freq_avg:
		freq_diff[abs(entry - english_IC)] = freq_avg[entry]

	# Sort this mapping and print out the most likely key lengths
	keys = freq_diff.keys()
	keys.sort()
	print "\nThe most likely key lengths are:"
	for entry in keys:
		print "{0} : {1:.3}".format(freq_diff[entry], entry)

	# Using the found keylength, calculate the most likely decrypting key
	print "\nAssuming the key has a length of {0} characters".format(freq_diff[keys[0]])
	key_length = freq_diff[keys[0]]

	# Now we shift each substring and try to find a shift where the IC is closest to 0.065
	print "The most likely key solutions are (read down on the left column):"
	substrings = get_substring(input_stripped, key_length) # First divide the input string by the key length
	key_final = ''

	for substr in substrings:
		freq_avg.clear() # First clear out both frequency dictionaries
		freq_diff.clear()

		# Create a mapping of IC to letters
		for g in range(alphabet_length): # Iterate through each letter + shifted value
			IC = 0
			for i in range(alphabet_length): # Iterate through each letter
				# Calculate the IC using M_g = Sum(i=0-25) : p_i * f_(i+g) / n'
				IC += float(letter_freq[chr(i+alphabet_offset)] * substr.count(chr((i+g)%alphabet_length + alphabet_offset))) / float(sample_size/key_length)
			freq_avg[IC] = chr((g)%alphabet_length + alphabet_offset) # Store the letter and calculated IC

		# Create a mapping of |IC - 0.065| to letters
		for entry in freq_avg:
			freq_diff[abs(entry - english_IC)] = freq_avg[entry]

		# Sort this mapping and print out the key
		keys = freq_diff.keys()
		keys.sort()
		output = ''
		key_final += freq_diff[keys[0]]
		for entry in keys:
			output += freq_diff[entry] + ' '
		print output
		for i in range(3):
			print "{0} : {1:.3}  ".format(freq_diff[keys[i]], keys[i])
		print

	# Decode the encrypted message using the key
	key_ind = 0
	output = ''
	for i in range(sample_size):
		output += char_shift(input_stripped[i], 0-ord(key_final[key_ind])-alphabet_offset)
		key_ind += 1
		key_ind %= key_length

	# Print the decoded message
	print "Encryption key: {0}, Decrypted message:".format(key_final)
	print output

